Merge pull request #212 from knu/website_agent-force_encoding

Add :force_encoding support to WebsiteAgent.

Andrew Cantino 11 ans auparavant
Parent
Commettre
7d9279b871
2 fichiers modifiés avec 87 ajouts et 2 suppressions
  1. 20 1
      app/models/agents/website_agent.rb
  2. 67 1
      spec/models/agents/website_agent_spec.rb

+ 20 - 1
app/models/agents/website_agent.rb

@@ -42,6 +42,8 @@ module Agents
42 42
       Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent.  This is only used to set the "working" status.
43 43
 
44 44
       Set `uniqueness_look_back` to limit the number of events checked for uniqueness (typically for performance).  This defaults to the larger of #{UNIQUENESS_LOOK_BACK} or #{UNIQUENESS_FACTOR}x the number of detected received results.
45
+
46
+      Set `force_encoding` to an encoding name if the website does not return a Content-Type header with a proper charset.
45 47
     MD
46 48
 
47 49
     event_description do
@@ -85,6 +87,19 @@ module Agents
85 87
       if options['uniqueness_look_back'].present?
86 88
         errors.add(:base, "Invalid uniqueness_look_back format") unless is_positive_integer?(options['uniqueness_look_back'])
87 89
       end
90
+
91
+      if (encoding = options['force_encoding']).present?
92
+        case encoding
93
+        when String
94
+          begin
95
+            Encoding.find(encoding)
96
+          rescue ArgumentError
97
+            errors.add(:base, "Unknown encoding: #{encoding.inspect}")
98
+          end
99
+        else
100
+          errors.add(:base, "force_encoding must be a string")
101
+        end
102
+      end
88 103
     end
89 104
 
90 105
     def check
@@ -99,7 +114,11 @@ module Agents
99 114
       end
100 115
 
101 116
       request.on_success do |response|
102
-        doc = parse(response.body)
117
+        body = response.body
118
+        if (encoding = options['force_encoding']).present?
119
+          body = body.encode(Encoding::UTF_8, encoding)
120
+        end
121
+        doc = parse(body)
103 122
 
104 123
         if extract_full_json?
105 124
           if store_payload!(previous_payloads(1), doc)

+ 67 - 1
spec/models/agents/website_agent_spec.rb

@@ -32,7 +32,17 @@ describe Agents::WebsiteAgent do
32 32
         lambda { @checker.save! }.should raise_error;
33 33
         @checker.options = @site
34 34
       end
35
-    
35
+
36
+      it "should validate the force_encoding option" do
37
+        @checker.options['force_encoding'] = 'UTF-8'
38
+        lambda { @checker.save! }.should_not raise_error;
39
+        @checker.options['force_encoding'] = ['UTF-8']
40
+        lambda { @checker.save! }.should raise_error;
41
+        @checker.options['force_encoding'] = 'UTF-42'
42
+        lambda { @checker.save! }.should raise_error;
43
+        @checker.options = @site
44
+      end
45
+
36 46
       it "should check for changes (and update Event.expires_at)" do
37 47
         lambda { @checker.check }.should change { Event.count }.by(1)
38 48
         event = Event.last
@@ -83,6 +93,62 @@ describe Agents::WebsiteAgent do
83 93
       end
84 94
     end
85 95
 
96
+    describe 'encoding' do
97
+      it 'should be forced with force_encoding option' do
98
+        huginn = "\u{601d}\u{8003}"
99
+        stub_request(:any, /no-encoding/).to_return(:body => {
100
+            :value => huginn,
101
+          }.to_json.encode(Encoding::EUC_JP), :headers => {
102
+            'Content-Type' => 'application/json',
103
+          }, :status => 200)
104
+        site = {
105
+          'name' => "Some JSON Response",
106
+          'expected_update_period_in_days' => 2,
107
+          'type' => "json",
108
+          'url' => "http://no-encoding.example.com",
109
+          'mode' => 'on_change',
110
+          'extract' => {
111
+            'value' => { 'path' => 'value' },
112
+          },
113
+          'force_encoding' => 'EUC-JP',
114
+        }
115
+        checker = Agents::WebsiteAgent.new(:name => "No Encoding Site", :options => site)
116
+        checker.user = users(:bob)
117
+        checker.save!
118
+
119
+        checker.check
120
+        event = Event.last
121
+        event.payload['value'].should == huginn
122
+      end
123
+
124
+      it 'should be overridden with force_encoding option' do
125
+        huginn = "\u{601d}\u{8003}"
126
+        stub_request(:any, /wrong-encoding/).to_return(:body => {
127
+            :value => huginn,
128
+          }.to_json.encode(Encoding::EUC_JP), :headers => {
129
+            'Content-Type' => 'application/json; UTF-8',
130
+          }, :status => 200)
131
+        site = {
132
+          'name' => "Some JSON Response",
133
+          'expected_update_period_in_days' => 2,
134
+          'type' => "json",
135
+          'url' => "http://wrong-encoding.example.com",
136
+          'mode' => 'on_change',
137
+          'extract' => {
138
+            'value' => { 'path' => 'value' },
139
+          },
140
+          'force_encoding' => 'EUC-JP',
141
+        }
142
+        checker = Agents::WebsiteAgent.new(:name => "Wrong Encoding Site", :options => site)
143
+        checker.user = users(:bob)
144
+        checker.save!
145
+
146
+        checker.check
147
+        event = Event.last
148
+        event.payload['value'].should == huginn
149
+      end
150
+    end
151
+
86 152
     describe '#working?' do
87 153
       it 'checks if events have been received within the expected receive period' do
88 154
         stubbed_time = Time.now